# 爬取今日头条的街拍图片信息
import os,time,json
import requests
from urllib.parse import urlencode
from urllib.request import urlretrieve
import ssl

ssl._create_default_https_context = ssl._create_unverified_context


def getPage(pn):
    '''爬取信息'''
    #定义参数
    
    params={
        'tn':'resultjson_com',
        'ipn':'rj',
        'ct':'201326592',
        'is':'',
        'fp':'result',
        'queryWord':'街拍',
        'cl':'2',
        'lm':'-1',
        'ie':'utf-8',
        'oe':'utf-8',
        'adpicid':'',
        'st':'-1',
        'z':'',
        'ic':'0',
        'word':'街拍',
        's':'',
        'se':'',
        'tab':'',
        'width':'',
        'height':'',
        'face':'0',
        'istype':'2',
        'qc':'',
        'nc':'1',
        'fr':'',
        'expermode':'',
        'pn':pn,
        'rn':'30',
        'gsm':str(hex(pn)),
        '1543131287389':'',
    }

    headers={
        'Cookie':'BDqhfp=%E8%A1%97%E6%8B%8D%26%260-10-1undefined%26%260%26%261; userFrom=null; cleanHistoryStatus=0; indexPageSugList=%5B%22%E8%A1%97%E6%8B%8D%22%5D; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; firstShowTip=1; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; BDRCVFR[X_XKQks0S63]=mk3SLVN4HKm; H_PS_PSSID=26523_1467_21090_22158; delPer=0; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; PSINO=7; BDSFRCVID=QJFsJeCCxG3t4Nn7UmSM-vstRACH6AQX2liC3J; H_BDCLCKID_SF=tbkD_C-MfIvhDRTvhCcjh-FSMgTBKI62aKDs-D8M-hcqEpO9QTbMjMCHjn_qWPcuyHTIXpjnWIQNVfP4h-rTDUThDNtDt60jfn3tWJTOaj6jDbTnMIT8bKCShUFsQxTl-2Q-5hOy3KOMMhrkjpoV0UrD5lbJQfomWncCbxP-anTThpFu-n5jHjQ-eHjP; BAIDUID=6AF5A77AF1D4FA4CC7B04EF0CBA6F50A:FG=1; BIDUPSID=6AF5A77AF1D4FA4CC7B04EF0CBA6F50A; PSTM=1528357108',
        'Accept':'text/plain, */*; q=0.01',
        'Accept-Language':'zh-cn',
        'Host':'image.baidu.com',
        'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.1 Safari/605.1.15',
        'Referer':'https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1543131275675_R&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word=%E8%A1%97%E6%8B%8D',
        'Accept-Encoding':'br, deflate',
        'Connection':'keep-alive',
        'X-Requested-With':'XMLHttpRequest',
    }
    #初始化url地址
    url = 'https://image.baidu.com/search/acjson?'+urlencode(params)
    
    res = requests.get(url,headers=headers)
    if res.status_code == 200:
        return json.loads(res.content.decode("utf-8"))


def getImage(json):
    '''获取图片信息'''
    data = json.get("data")
    if data:
        for item in data:
            title = item.get("fromPageTitleEnc")
            yield {
                'image':item.get("thumbURL"),
                'title':title,
            }

def saveImage(item):
    '''储存图片信息'''
    #处理图片的存储路径
    path = os.path.join("./mypic/",item.get('title'))
    if not os.path.exists(path):
        os.mkdir(path)

    #图片路径的处理
    image_url = item.get("image")
    save_pic =path+"/"+image_url.split("/").pop()

    #存储图片
    urlretrieve(image_url,save_pic)

def main(pn):
    '''主函数'''
    json = getPage(pn)
    
    
    for item in getImage(json):
        print(item)
        saveImage(item)
    

if __name__ == "__main__": 
    for i in range(5):
        main(pn=i*30)
        time.sleep(1)
        